"""
# coding     : utf-8 
# Time       : 2025/6/19 10:42
# Author     : chenxianb
# version    : python 3.8.2
# Description: 获取猫眼电影经典影片   https://www.maoyan.com/films?showType=3&sortId=3
"""
import os.path
import re
import time

import openpyxl
import requests


class Movie():

    def get_html_text(self, url):

        header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0"}
        try:
            r = requests.get(url, headers=header)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return ""

    def get_movie(self, html):

        movie_lst = []

        # 类型
        type_pattern = re.compile(r"""类型:</span>
          .*""")
        types = re.findall(pattern=type_pattern, string=html)

        # 名字
        name_pattern = re.compile(r"""<span class="name ">.*</span>""")
        names = re.findall(name_pattern, html)

        # 主演
        actor_pattern = re.compile(r"""主演:</span>
              .*
            </div>""")
        actors = re.findall(actor_pattern, html)

        # 上映日期
        date_pattern = re.compile(r"""<span class="hover-tag">上映时间:</span>
              .*
            </div>""")
        date = re.findall(date_pattern, html)

        # 评分
        score_pattern = re.compile(r'<i class="integer">.*</i><i class="fraction">.*</i></span>')
        score = re.findall(score_pattern, html)

        for i in range(len(names)):
            movie_name = names[i].replace('<span class="name ">', '').replace('</span>', '').strip()
            movie_type = types[i].split(" ")[-1].strip()
            movie_actor = actors[i].replace('主演:</span>', '').replace('</div>', '').strip()
            movie_date = date[i].replace('<span class="hover-tag">上映时间:</span>', '').replace('</div>', '').strip().split(" ")[0]
            movie_score = score[i].replace('<i class="integer">', '').replace('</i><i class="fraction">', '').replace(
                '</i></span>', '').strip()
            # print(movie_name,movie_type,movie_actor,movie_score,movie_date)
            movie_lst.append([movie_name, movie_score, movie_type, movie_actor, movie_date])
        return movie_lst

    def save_excel(self, path, info):
        if not os.path.exists(path):
            os.mkdir(path)

        timestamp = int(time.time())
        file = f"{path}/猫眼电影{timestamp}.xlsx"

        wb = openpyxl.Workbook(file)
        table = wb.create_sheet("电影", 0)

        title = ["电影名", "评分", "类型", "主演", "上映日期"]
        table.append(title)

        for item in info:
            table.append(item)

        print("保存成功")


if __name__ == '__main__':
    url = "https://www.maoyan.com/films?showType=3&sortId=3"
    path = './movie/'
    movie = Movie()
    html = movie.get_html_text(url)
    info = movie.get_movie(html)
    movie.save_excel(path, info)
